clear all
set more off, perm
set mem 10000000
set matsize 10000

*************************************************** 
*** DD regressions: reduced-form DISE outcomes ****
*************************************************** 

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

********************************************************************************
********************************************************************************

** 1. Run difference in discontinuities for DISE 
{
use "$panel/panel_dataset_dd.dta", clear
keep if vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
drop p_300_X-band300_p_200
drop bin25_p-within_border_100k
drop vplan vplan2 vplan3 vdpr vdpr2 vdpr3
drop treat_x_post dd_* stateyear
drop lights_mean_hat lights_max_hat
drop no_hh tot_p tot_m tot_f vd01_id conc_id v_id_hpca11 vd11_id c_code01 v_id_master ///
	h?_id h?v_id f_area totp_h? count_h? maxp_h? grad_max grad_mean

merge 1:1 pca01_id st_code dt_code vplan4 vdpr4 year using ///
	"$panel/panel_dataset_lights_allyears.dta", keep(3) nogen
egen temp1 = max(lights_max*(year==2001)), by(pca01_id)
egen temp2 = max(lights_max*(year==2011)), by(pca01_id)
gen lights_diff = abs(temp2-temp1)
assert lights_diff!=.
	
egen stdtbk = group(stdt bk_code)
gen X = (tot_p01-300)
assert X!=.
gen D = X>=0
gen DX = D*X

gen aw150 = abs(150-abs(300-tot_p01))
gen aw100 = abs(100-abs(300-tot_p01))

	// Merge in DISE panel
preserve
use "$panel/pca_school_selected_panel_new.dta", clear	
foreach p in all boys girls {
	gen g15_enroll_`p' = g1_enroll_`p'+g2_enroll_`p'+g3_enroll_`p'+g4_enroll_`p'+g5_enroll_`p'
	gen g68_enroll_`p' = g6_enroll_`p'+g7_enroll_`p'+g8_enroll_`p'
}
keep pca01_id school_year tot_enroll* g15_enroll* g68_enroll* tot_pass_prev?? tot_pass60_prev??
rename school_year year
tempfile dise
save `dise'
restore
merge 1:1 pca01_id year using `dise'

	// Drop DISE villages not in RD sample
egen temp = min(_merge), by(pca01_id)
keep if temp==1	
tab year
drop temp

	// Drop non-DISE villages
egen temp = max(_merge), by(pca01_id)
keep if temp>1	
tab year
drop temp


	// Populate variables for regression
foreach v of varlist D X DX tot_p01 aw150 aw100 stdt stdtbk st_code pop_mismatch20 lights_diff {
	egen temp = mode(`v'), by(pca01_id)
	replace `v' = temp if `v'==.
	assert `v'!=.
	drop temp
}
drop if year==2001


	// Assess test score coverage
foreach v of varlist tot_pass_prev?? tot_pass60_prev?? {
	gen temp = `v'!=. & `v'!=0
	tabstat temp, by(year)
	drop temp
}
	// omit 2010-2013 from these regs


gen yvar = ""
gen fes = ""
gen vce = ""
gen bw = .
foreach y in 2006 2007 2008 2009 2010 2011 2012 2013 2014 {
	gen beta`y' = .
	gen se`y' = .
	gen tscore`y' = .
	gen pvalue`y' = .
	gen ci95_lo`y' = .
	gen ci95_hi`y' = .
}
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
	
local vce = "cluster stdtbk"
local row = 1

	// Diff-in-disc regression loop: using all years, omitting 2005, and not using test scores (which are sparsely populated)
foreach yvar of varlist tot_enroll* g15_enroll* g68_enroll* {
	foreach bw in 100 150 {
		foreach fe in 1 2 {
			
			if `fe'==1 {
				local fes = "pca01_id year#stdt"
			}
			else if `fe'==2 {
				local fes = "pca01_id year#st_code"
			}
			
			reghdfe `yvar' ///
				1.D#2006.year 1.D#2007.year 1.D#2008.year 1.D#2009.year ///
				1.D#2010.year 1.D#2011.year 1.D#2012.year 1.D#2013.year ///
				1.D#2014.year c.X#i.year c.DX#i.year ///
				if inrange(tot_p01,300-`bw',300+`bw') & ///
				pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
				
			replace yvar = "`yvar'" in `row'
			replace fes = "`fes'" in `row'
			replace vce = "`vce'" in `row'
			replace bw = `bw' in `row'
			foreach y in 2006 2007 2008 2009 2010 2011 2012 2013 2014 {
				replace beta`y' = _b[1.D#`y'.year] in `row'
				replace se`y' = _se[1.D#`y'.year] in `row'	
				replace tscore`y' = beta`y'/se`y' in `row'
				replace pvalue`y' = 2*ttail(e(df_r),abs(_b[1.D#`y'.year]/_se[1.D#`y'.year])) in `row'
				replace ci95_lo`y' = beta`y' - se`y'*invttail(e(df_r),0.025) in `row'
				replace ci95_hi`y' = beta`y' + se`y'*invttail(e(df_r),0.025) in `row'
			}	
			sum `yvar' if e(sample)							
			replace ymean = r(mean)	in `row'
			replace nobs = e(N)	in `row'
			replace nclust = e(N_clust) in `row'
			replace rmse = e(rmse) in `row'
			local row = `row' + 1

		}
	}
}


	// Diff-in-disc regression loop: test scores only, omitting 2005, and excluding (not-populated 2010-2013)
foreach yvar of varlist tot_pass_prev?? tot_pass60_prev?? {
	foreach bw in 100 150 {
		foreach fe in 1 2 {
			
			if `fe'==1 {
				local fes = "pca01_id year#stdt"
			}
			else if `fe'==2 {
				local fes = "pca01_id year#st_code"
			}
			
			reghdfe `yvar' ///
				1.D#2006.year 1.D#2007.year 1.D#2008.year 1.D#2009.year ///
				///1.D#2010.year 1.D#2011.year 1.D#2012.year 1.D#2013.year ///
				1.D#2014.year c.X#i.year c.DX#i.year ///
				if inrange(tot_p01,300-`bw',300+`bw') & ///
				pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
				
			replace yvar = "`yvar'" in `row'
			replace fes = "`fes'" in `row'
			replace vce = "`vce'" in `row'
			replace bw = `bw' in `row'
			foreach y in 2006 2007 2008 2009 2014 {
				replace beta`y' = _b[1.D#`y'.year] in `row'
				replace se`y' = _se[1.D#`y'.year] in `row'	
				replace tscore`y' = beta`y'/se`y' in `row'
				replace pvalue`y' = 2*ttail(e(df_r),abs(_b[1.D#`y'.year]/_se[1.D#`y'.year])) in `row'
				replace ci95_lo`y' = beta`y' - se`y'*invttail(e(df_r),0.025) in `row'
				replace ci95_hi`y' = beta`y' + se`y'*invttail(e(df_r),0.025) in `row'
			}	
			sum `yvar' if e(sample)							
			replace ymean = r(mean)	in `row'
			replace nobs = e(N)	in `row'
			replace nclust = e(N_clust) in `row'
			replace rmse = e(rmse) in `row'
			local row = `row' + 1
				
		}
	}
}


keep yvar-rmse
dropmiss, obs force
compress
save "$results/diff_in_disc_outcomes_schools.dta", replace
	
}

********************************************************************************
********************************************************************************

** 2. DD pooled, and split by population bin (DISE 2005-2014)
{

use "$panel/panel_dataset_dd.dta", clear

	// Merge in DISE panel
preserve
use "$panel/pca_school_selected_panel_new.dta", clear	
foreach p in all boys girls {
	gen g15_enroll_`p' = g1_enroll_`p'+g2_enroll_`p'+g3_enroll_`p'+g4_enroll_`p'+g5_enroll_`p'
	gen g68_enroll_`p' = g6_enroll_`p'+g7_enroll_`p'+g8_enroll_`p'
}
keep pca01_id school_year tot_enroll* g15_enroll* g68_enroll* tot_pass_prev?? tot_pass60_prev??
rename school_year year
tempfile dise
save `dise'
restore
merge 1:1 pca01_id year using `dise'

	// Drop DISE villages that never merge
egen temp = min(_merge), by(pca01_id)
keep if temp==1	
tab year
drop temp

	// Drop non-DISE villages
egen temp = max(_merge), by(pca01_id)
keep if temp>1	
tab year
drop temp
drop _merge

	// Populate variables for regression
foreach v of varlist tot_p01 stdt st_code dt_code sample_1011 vplan4 {
	egen temp = mode(`v'), by(pca01_id)
	replace `v' = temp if `v'==.
	drop temp
}
drop if year==2001

	// Assess test score coverage
foreach v of varlist tot_pass_prev?? tot_pass60_prev?? {
	gen temp = `v'!=. & `v'!=0
	tabstat temp, by(year)
	drop temp
}
	// omit 2010-2013 from these regs

	// Bring in NSS expenditure trend variables
preserve
use "$panel/panel_dataset_dd_nss.dta", clear
keep st_code dt_code exp05*
duplicates drop
tempfile nss
save `nss'
restore
merge m:1 st_code dt_code using `nss', nogen
drop if pca01_id==.	
	
	// Bring in staggered RGGVY rollout variables
preserve
use "$rggvy/rggvy_district_progress_X_XI_processed.dta", clear
replace award_date = sanction_date if award_date<sanction_date
replace award_date = max(award_date,17553) if plan==11
collapse (min) min_award_date=award_date (max) max_award_date=award_date, ///
	by(st_code dt_code) fast
gen med_award_date = round((min_award_date+max_award_date)/2,1)
format %td med_award_date
tempfile award_dates
save `award_dates'
restore
merge m:1 st_code dt_code using `award_dates', keep(1 3)
assert _merge==3 if vplan4!=.
assert _merge==1 if vplan4==.
drop _merge

	// Bring in 2001 Census
preserve
use "$panel/panel_dataset_no_lights.dta", clear
keep pca01_id lit_p_01 work_p_01 work_m_01 work_f_01 vd_geo_a_01 vd_geo_a_irr_01 vd_geo_k_town_01 ///
	vd_edu_d_01 vd_hea_d_medi_01 vd_wat_d_any_01 vd_tra_d_bus_01 vdpr_tra_d_app_pr_01 vd_pwr_d_all_01 ///
	vdpr_pwr_d_oth_01 vd_pwr_d_agr_01 vd_pwr_d_dom_01 vd_fin_d_ac_soc_01 vd_pwr_d_any_01 pct_work_main_01
unique pca01_id
assert r(unique)==r(N)
tempfile vars2001
save `vars2001'	
restore
merge m:1 pca01_id using `vars2001', nogen keep(1 3)


	// DD 1: 10th plan * post-2005
gen dd_1_treat_x_post = vplan4<11 & year>2005
la var dd_1_treat_x_post "DD 1: post-2005 (both plans)"	

	// DD 2: 10th plan * post-award-year
gen award_year = year(med_award_date)
egen stdt_tag = tag(st_code dt_code)
tab award_year vplan4 if stdt_tag, missing
gen dd_2_treat_x_post = vplan4<11 & year>award_year
la var dd_2_treat_x_post "DD 2: post-award-year (10th plan)"	

	// DD 3: post-award-year
gen dd_3_treat_x_post = year>award_year
la var dd_3_treat_x_post "DD 3: post-award-year (both plans)"	
	
	// Create 500-person population bins (switching from 300 to 500 to align with NSS population medians)
drop popbin*	
gen popbin = 0
local bin = 500
local bmax = 3000
local bmax2 = 2900	
forvalues i = 0(`bin')`bmax2' {
  local iplus1 = `i' + `bin'
  gen popbin_`i'_`iplus1' = tot_p01>`i' & tot_p01<=`iplus1'
  la var popbin_`i'_`iplus1' "2001 population bin: (`i',`iplus1']"
  replace popbin = `i' if popbin_`i'_`iplus1'==1
}
gen popbin_`bmax'_max = tot_p01>`bmax'
replace popbin = `bmax' if popbin_`bmax'_max==1
la var popbin "Lower end of 2001 population bin"

	// DD population bins
foreach j in 1 2 3 {
	forvalues i = 0(`bin')`bmax2' {
	  local iplus1 = `i' + `bin'
	  gen dd_`j'_x_popbin_`i'_`iplus1' = dd_`j'_treat_x_post * popbin_`i'_`iplus1'
	  la var dd_`j'_x_popbin_`i'_`iplus1' "DD `j' treat x popbin: `i'-`iplus1'"
	}
	gen dd_`j'_x_popbin_`bmax'_max = dd_`j'_treat_x_post * popbin_`bmax'_max
	la var dd_`j'_x_popbin_`bmax'_max "DD `j' treat x popbin `bmax' to infinity"
}


	// REGRESSION LOOP
cap drop yvar-rmse
cap drop row_id
gen yvar = ""
gen dd_model = ""
gen dd_var = .
gen fes = ""
gen ifs = ""
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
forvalues p = 0(`bin')`bmax' {
	local p1 = `p'+`bin'
	if `p'==`bmax' {
		local p1 = "max"
	}
	gen beta_`p'_`p1' = .
	gen se_`p'_`p1' = .
	gen tscore_`p'_`p1' = .
	gen pvalue_`p'_`p1' = .
	gen ci95_lo_`p'_`p1' = .
	gen ci95_hi_`p'_`p1' = .
}
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
gen row_id = .

local row = 1

foreach dd_var in 1 2 3 {
	
	foreach dd_sample in 1 {
		
		if `dd_sample'==1 {
			local ifs = ""
		}
		else if `dd_sample'==2 {
			local ifs = " if sample_1011==1"
		}
		
		foreach yvar of varlist tot_enroll* g15_enroll* g68_enroll* tot_pass_prev?? tot_pass60_prev?? {
	
			foreach FEs in 1 2 3 4 5 {
				
				if `FEs'==1 {
					local fes = "pca01_id year"
				}
				else if `FEs'==2 {
					local fes = "pca01_id year exp05_st_4ile#c.year exp05_ntl_10ile#c.year"
				}
				else if `FEs'==3 {
					local fes = "pca01_id year exp05_st_4ile#c.year exp05_ntl_10ile#c.year c.year#st_code"
				}
				else if `FEs'==4 {
					local fes = "pca01_id year exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year c.(lit_p_01 work_p_01 work_f_01 vd_geo_a_01 vd_geo_k_town_01 vd_edu_d_01 vd_hea_d_medi_01 vd_wat_d_any_01 vd_tra_d_bus_01 vdpr_tra_d_app_pr_01 vd_pwr_d_all_01 vd_pwr_d_any_01 vdpr_pwr_d_oth_01 vd_pwr_d_agr_01 vd_pwr_d_dom_01 vd_fin_d_ac_soc_01)#year"
				}
				else if `FEs'==5 {
					local fes = "pca01_id year exp05_st_4ile#year exp05_ntl_10ile#year st_code#c.year c.(lit_p_01 work_p_01 work_f_01 vd_geo_a_01 vd_geo_k_town_01 vd_edu_d_01 vd_hea_d_medi_01 vd_wat_d_any_01 vd_tra_d_bus_01 vdpr_tra_d_app_pr_01 vd_pwr_d_all_01 vd_pwr_d_any_01 vdpr_pwr_d_oth_01 vd_pwr_d_agr_01 vd_pwr_d_dom_01 vd_fin_d_ac_soc_01)#year"
				}
				
				*pooled DD
				local dd_model = "pooled"
				reghdfe `yvar' dd_`dd_var'_treat_x_post `ifs', absorb(`fes') vce(cluster stdt)
				qui replace yvar = "`yvar'"	if _n==`row'
				qui replace dd_model = "`dd_model'" if _n==`row'
				qui replace dd_var = `dd_var'	if _n==`row'
				qui replace fes = "`fes'" if _n==`row'
				qui replace ifs = "`ifs'" if _n==`row'
				qui replace beta = _b[dd_`dd_var'_treat_x_post] if _n==`row'
				qui replace se = _se[dd_`dd_var'_treat_x_post]	if _n==`row'
				qui replace tscore = beta/se in `row'
				qui replace pvalue = 2*ttail(e(df_r),abs(_b[dd_`dd_var'_treat_x_post]/_se[dd_`dd_var'_treat_x_post])) in `row'
				qui replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
				qui replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
				qui sum `yvar' if e(sample)	
				qui replace ymean = r(mean)	if _n==`row'
				qui replace nobs = e(N)	if _n==`row'
				qui replace nclust = e(N_clust)	if _n==`row'
				qui replace rmse = e(rmse) if _n==`row'
				qui replace row_id	= `row'	if _n==`row'
				qui local row = `row' + 1
				
				* DD w/ pop bins
				local dd_model = "pop bins"
				local fes = subinstr("`fes'"," year ", " year#popbin ",1)
				reghdfe `yvar' dd_`dd_var'_x_popbin_*  `ifs', absorb(`fes') vce(cluster stdt)
				qui replace yvar = "`yvar'"	if _n==`row'
				qui replace dd_model = "`dd_model'" if _n==`row'
				qui replace dd_var = `dd_var'	if _n==`row'
				qui replace fes = "`fes'" if _n==`row'
				qui replace ifs = "`ifs'" if _n==`row'
				forvalues p = 0(`bin')`bmax' {	
					local p1 = `p'+`bin'
					if `p'==`bmax' {
						local p1 = "max"
					}
					qui replace beta_`p'_`p1' = _b[dd_`dd_var'_x_popbin_`p'_`p1'] if _n==`row'
					qui replace se_`p'_`p1' = _se[dd_`dd_var'_x_popbin_`p'_`p1'] if _n==`row'
					qui replace tscore_`p'_`p1' = beta_`p'_`p1'/se_`p'_`p1' in `row'
					qui replace pvalue_`p'_`p1' = 2*ttail(e(df_r),abs(_b[dd_`dd_var'_x_popbin_`p'_`p1']/_se[dd_`dd_var'_x_popbin_`p'_`p1'])) in `row'
					qui replace ci95_lo_`p'_`p1' = beta_`p'_`p1' - se_`p'_`p1'*invttail(e(df_r),0.025) in `row'
					qui replace ci95_hi_`p'_`p1' = beta_`p'_`p1' + se_`p'_`p1'*invttail(e(df_r),0.025) in `row'
				}
				qui sum `yvar' if e(sample)							
				qui replace ymean = r(mean)	if _n==`row'
				qui replace nobs = e(N)	if _n==`row'
				qui replace nclust = e(N_clust)	if _n==`row'
				qui replace rmse = e(rmse)	if _n==`row'
				qui replace row_id	= `row'	if _n==`row'
				qui local row = `row' + 1
			}
		}
	}
}

	// save results
keep yvar-row_id
keep if yvar!=""
compress
saveold "$results/dd_results_outcomes_dise_allyears.dta", replace

}

********************************************************************************
********************************************************************************

